#!/usr/bin/env python3
"""
Create a publication-ready visualization of Table 1
"""

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def create_table1_visualization():
    """Create a professional table visualization"""
    
    # Read the complete dataset
    df = pd.read_csv('/home/ubuntu/Table1_Complete_38_Disasters.csv')
    
    # Select key columns for display
    display_columns = [
        'Event_ID', 'Date', 'Disaster_Type', 'Country', 'Affected_Population', 
        'Deaths', 'Languages_Count', 'Median_Delay_Hours', 'Economic_Loss_USD'
    ]
    
    display_df = df[display_columns].copy()
    
    # Format the data for better display
    display_df['Affected_Population'] = display_df['Affected_Population'].apply(lambda x: f"{x/1000000:.1f}M" if x >= 1000000 else f"{x/1000:.0f}K")
    display_df['Deaths'] = display_df['Deaths'].apply(lambda x: f"{x:,}")
    display_df['Economic_Loss_USD'] = display_df['Economic_Loss_USD'].apply(lambda x: f"${x/1000000000:.1f}B" if x >= 1000000000 else f"${x/1000000:.0f}M")
    display_df['Median_Delay_Hours'] = display_df['Median_Delay_Hours'].apply(lambda x: f"{x:.1f}")
    
    # Rename columns for better display
    display_df.columns = [
        'Event ID', 'Date', 'Disaster Type', 'Country', 'Affected Pop.', 
        'Deaths', 'Languages', 'Median Delay (h)', 'Economic Loss'
    ]
    
    # Create the table visualization
    fig, ax = plt.subplots(figsize=(20, 24))
    ax.axis('tight')
    ax.axis('off')
    
    # Create table
    table = ax.table(cellText=display_df.values,
                    colLabels=display_df.columns,
                    cellLoc='center',
                    loc='center',
                    bbox=[0, 0, 1, 1])
    
    # Style the table
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(1, 1.8)
    
    # Header styling
    for i in range(len(display_df.columns)):
        table[(0, i)].set_facecolor('#2E86AB')
        table[(0, i)].set_text_props(weight='bold', color='white')
        table[(0, i)].set_height(0.08)
    
    # Alternate row colors and styling
    for i in range(1, len(display_df) + 1):
        for j in range(len(display_df.columns)):
            if i % 2 == 0:
                table[(i, j)].set_facecolor('#F8F9FA')
            else:
                table[(i, j)].set_facecolor('white')
            
            # Highlight high-impact events
            deaths = int(df.iloc[i-1]['Deaths'])
            if deaths > 10000:
                table[(i, j)].set_facecolor('#FFE6E6')
            elif deaths > 1000:
                table[(i, j)].set_facecolor('#FFF3E6')
            
            table[(i, j)].set_height(0.06)
    
    # Add title
    plt.suptitle('Table 1: Comprehensive Dataset of 38 Climate Disasters (2005-2024)\n' +
                'Translation Delays, Mortality Outcomes, and Covariates', 
                fontsize=16, fontweight='bold', y=0.98)
    
    # Add subtitle with summary statistics
    total_deaths = df['Deaths'].sum()
    total_affected = df['Affected_Population'].sum()
    median_delay = df['Median_Delay_Hours'].median()
    
    subtitle = f'Total Deaths: {total_deaths:,} | Total Affected: {total_affected/1000000:.1f}M | Median Translation Delay: {median_delay:.1f} hours'
    plt.figtext(0.5, 0.95, subtitle, ha='center', fontsize=12, style='italic')
    
    # Add footnote
    footnote = ('Note: Economic losses in billions (B) or millions (M) USD. Population affected in millions (M) or thousands (K). '
               'Translation delays represent median values across all languages per event. '
               'Red shading indicates >10,000 deaths; orange shading indicates >1,000 deaths.')
    plt.figtext(0.5, 0.02, footnote, ha='center', fontsize=10, wrap=True)
    
    plt.tight_layout()
    plt.savefig('/home/ubuntu/Table1_Complete_38_Disasters_Visualization.png', 
                dpi=300, bbox_inches='tight', facecolor='white')
    plt.close()
    
    print("Table 1 visualization created successfully!")

def create_summary_statistics_table():
    """Create a summary statistics table"""
    
    df = pd.read_csv('/home/ubuntu/Table1_Complete_38_Disasters.csv')
    
    # Calculate summary statistics
    summary_stats = {
        'Metric': [
            'Total Events',
            'Total Deaths',
            'Total Affected Population',
            'Total Economic Loss (USD)',
            'Median Translation Delay (hours)',
            'Mean Translation Delay (hours)',
            'Languages per Event (median)',
            'Date Range',
            'Geographic Coverage',
            'Most Affected Region'
        ],
        'Value': [
            f"{len(df):,}",
            f"{df['Deaths'].sum():,}",
            f"{df['Affected_Population'].sum()/1000000:.1f} million",
            f"${df['Economic_Loss_USD'].sum()/1000000000:.1f} billion",
            f"{df['Median_Delay_Hours'].median():.1f}",
            f"{df['Mean_Delay_Hours'].mean():.1f}",
            f"{df['Languages_Count'].median():.0f}",
            f"{df['Date'].min()} to {df['Date'].max()}",
            f"{df['Region'].nunique()} regions, {df['Country'].nunique()} countries",
            f"{df['Region'].value_counts().index[0]} ({df['Region'].value_counts().iloc[0]} events)"
        ]
    }
    
    summary_df = pd.DataFrame(summary_stats)
    summary_df.to_csv('/home/ubuntu/Table1_Summary_Statistics.csv', index=False)
    
    print("Summary statistics table created!")

def main():
    """Generate all table visualizations"""
    create_table1_visualization()
    create_summary_statistics_table()
    
    print("\\nAll Table 1 files created:")
    print("- Table1_Complete_38_Disasters.csv (raw data)")
    print("- Table1_Formatted_Display.csv (formatted data)")
    print("- Table1_Complete_38_Disasters_Visualization.png (visual table)")
    print("- Table1_Summary_Statistics.csv (summary stats)")
    print("- Language_Details_73_Languages.csv (detailed language data)")

if __name__ == "__main__":
    main()

